library(tidyverse)
library(parlitools)
library(tidylog)
library(haven)
library(lubridate)

#GET CROSS-SECTIONAL CLIMATE TWEETS DATA
load("data/analysis/MPtweetsv2.Rdata")
climterms <- readRDS("data/output/climgenterms.rds")

#GET CROSS-SECTIONAL CROSS-WALK
MPcs <- read_csv("data/analysis/MP_cs_all.csv")
MPcw <- MPcs %>%
  select(about)

MPtweets_hansm <- left_join(MPtweets, MPcw, by="about")

#GET FFF EVENTS WITH DATES AND GSS CODE
fff_events_dates <- read_csv("data/output/fff_events_dates.csv")
fff_events_dates <- fff_events_dates %>%
  mutate(obs = 1) %>%
  group_by(date, gss_code) %>%
  summarise(sum_fff_events = sum(obs))

#GET DAILY MP DATA WITH 1/0 IF TWEETED ABOUT ENVIRONMENT THAT DATE
MPclimtweetdays <- MPtweets_hansm %>%
  filter(grepl(climterms,tweet, ignore.case = T)) %>%
  mutate(obs = 1) %>%
  group_by(date, about) %>%
  summarise(sum_ctweets = sum(obs))

#GET DAILY MP DATA WITH TWEET SUMS BY DAY
MPtweetdays <- MPtweets_hansm %>%
  mutate(obs = 1) %>%
  group_by(about) %>%
  complete(date = seq.Date(min(as.Date("2017-06-08")), max(as.Date("2019-12-12")), 
                           by="day")) %>%
  mutate(obs = ifelse(is.na(obs), 0, obs)) %>%
  group_by(date, about) %>%
  summarise(sum_tweets = sum(obs)) %>%
  arrange(about, date)

#MERGE IN CLIMATE TWEET DAYS INFO.
MPctdays <- left_join(MPtweetdays, MPclimtweetdays, by = c("date", "about"))
#change NAs to 0
MPctdays$sum_ctweets <- ifelse(is.na(MPctdays$sum_ctweets), 0, MPctdays$sum_ctweets)
#get proportion of daily tweets mentioning climate
MPctdays$propcts <- MPctdays$sum_ctweets/MPctdays$sum_tweets
#change NAs to 0
MPctdays$propcts <- ifelse(is.na(MPctdays$propcts), 0, MPctdays$propcts)

#MERGE IN FFF EVENTS DATA
MPcw <- MPcs %>%
  select(about, gss_code)
MPctdays <- left_join(MPctdays, MPcw, by="about")
MPctdaysfff <- left_join(MPctdays, fff_events_dates, by = c("date", "gss_code"))
#change NAs to 0
MPctdaysfff$sum_fff_events <- ifelse(is.na(MPctdaysfff$sum_fff_events), 0, 
                                     MPctdaysfff$sum_fff_events)
#generate binary var. for FFF events
MPctdaysfff$fff_event <- ifelse(MPctdaysfff$sum_fff_events > 0,1,0)

#GET DAILY MP SPEECHES DATA WITH 1/0 IF SPOKE ABOUT THE ENVIRONMENT ON THAT DATE
hans_ids <- unique(MPctdays$about)
speeches <- read_csv("data/output/speeches.csv")
#filter out MPs not in final MP tweets panel
speeches <- speeches %>%
  filter(about %in% hans_ids)

MPclimspchdays <- speeches %>%
  filter(grepl(climterms,speech_text, ignore.case = T)) %>%
  mutate(obs = 1,
         date = speech_date) %>%
  group_by(date, about) %>%
  summarise(sum_cspchs = sum(obs))

#GET DAILY MP DATA WITH SPEECH SUMS BY DAY
MPspchdays <- speeches %>%
  mutate(obs = 1,
         date = speech_date) %>%
  group_by(date, about) %>%
  summarise(sum_spchs = sum(obs)) %>%
  arrange(about, date)

#MERGE SPEECHES INFORMATION INTO PANEL
MPctdayspchs <- left_join(MPctdaysfff, MPclimspchdays, by = c("date", "about"))
MPctdayspchs <- left_join(MPctdayspchs, MPspchdays, by = c("date", "about"))
#change NAs to 0
MPctdayspchs$sum_cspchs <- ifelse(is.na(MPctdayspchs$sum_cspchs), 0, 
                                  MPctdayspchs$sum_cspchs)
MPctdayspchs$sum_spchs <- ifelse(is.na(MPctdayspchs$sum_spchs), 0, 
                                  MPctdayspchs$sum_spchs)

#MERGE IN OTHER MP INFORMATION WITH CROSS-WALK
MPcw <- MPcs %>%
  select(about, username, full_name, constituency_name, party_value, gender)
MPcpanel <- left_join(MPctdayspchs, MPcw, by="about")
MPcpanel <- MPcpanel %>%
  select(about, full_name, username, gender, party_value,
         date, sum_tweets, sum_ctweets, propcts, sum_spchs, sum_cspchs, 
         fff_event, sum_fff_events, constituency_name, gss_code)

#INCORPORATE MP POSITIONS DATA
add_mp <- read_csv("data/raw/MP_positions.csv")
add_mp$start <- dmy(add_mp$start)
add_mp$end <- dmy(add_mp$end)
add_mp$end[is.na(add_mp$end)] <- as.Date("2019-12-12")

#fill in dates between start and end
add_mp_seq <- add_mp %>%
  gather(var, date, -full_name, -position, -category) %>%
  group_by(full_name, position, category) %>%
  distinct() %>%
  complete(date = seq.Date(min(date), max(date), by = "day")) %>%
  select(-var) %>%
  arrange(date)

#remove duplicate dates for position change date
add_mp_seq <- add_mp_seq[!duplicated(add_mp_seq[c(1,4)]),]

#fill in dates for observation period
add_mp_seqf <- add_mp_seq %>%
  filter(date >= "2017-06-08" & date <= "2019-12-12") %>%
  arrange(full_name, date) %>%
  group_by(full_name) %>%
  complete(date = seq.Date(min(as.Date("2017-06-08")), max(as.Date("2019-12-12")), 
                           by="day"))
#merge with panel
MPcpanel_add <- left_join(MPcpanel, add_mp_seqf, by = c("full_name", "date"))
#rows only in y is Edward Timpson and can be safely dropped

#INCORPORATE ELECTORAL DATA
resultsbes17 <- readRDS("data/output/elec_results17_bes.RDS")
resultsbes17$constituency_name[resultsbes17$constituency_name=="St Austell and Newquay"] <- "St. Austell and Newquay"
resultsbes17$constituency_name[resultsbes17$constituency_name=="St Helens South and Whiston"] <- "St. Helens South and Whiston"
resultsbes17$constituency_name[resultsbes17$constituency_name=="St Helens North"] <- "St. Helens North"
resultsbes17$constituency_name[resultsbes17$constituency_name=="Holborn and St Pancras"] <- "Holborn and St. Pancras"
resultsbes17$constituency_name[resultsbes17$constituency_name=="St Ives"] <- "St. Ives"
resultsbes17$constituency_name[resultsbes17$constituency_name=="Fermanagh and South Tyrone"] <- "Fermanagh & South Tyrone"
resultsbes17$constituency_name[resultsbes17$constituency_name=="Ynys Môn"] <- "Ynys Mon"
resultsbes17$constituency_name[resultsbes17$constituency_name=="Weston-super-Mare"] <- "Weston-Super-Mare"
resultsbes17$constituency_name[resultsbes17$constituency_name=="Newry and Armagh"] <- "Newry & Armagh"
resultsbes17$constituency_name[resultsbes17$constituency_name=="Bury St Edmunds"] <- "Bury St. Edmunds"

MPcpanel_add_elec <- left_join(MPcpanel_add, resultsbes17, by = c("constituency_name"))

# remove observations for MPs who left before 2019 election

MPcpanel_add_elec <- MPcpanel_add_elec[!(MPcpanel_add_elec$full_name=="Fiona Onasanya" & MPcpanel_add_elec$date>"2019-01-28"),]
MPcpanel_add_elec <- MPcpanel_add_elec[!(MPcpanel_add_elec$full_name=="Heidi Alexander" & MPcpanel_add_elec$date>"2018-06-13"),]
MPcpanel_add_elec <- MPcpanel_add_elec[!(MPcpanel_add_elec$full_name=="Barry McElduff" & MPcpanel_add_elec$date>"2018-01-15"),]
MPcpanel_add_elec <- MPcpanel_add_elec[!(MPcpanel_add_elec$full_name=="Paul Flynn" & MPcpanel_add_elec$date>"2019-02-17"),]

#gen .csv, .RDS, .dt
write_csv(MPcpanel_add_elec, "data/analysis/MP_panelv2.csv")
saveRDS(MPcpanel_add_elec, "data/analysis/MP_panelv2.RDS")
write_dta(MPcpanel_add_elec, "data/analysis/MP_panelv2.dta")

#get days in position for MP positions

posdays <- MPcpanel_add_elec %>%
  group_by(about, category) %>%
  tally() %>%
  filter(!is.na(category)) %>%
  spread(key = category,value = n) %>%
  replace_na(list(minister = 0,
                  `opposition whip`=0,
                  `shadow minister`=0,
                  `small party spokesperson`=0,
                  `small party whip`=0,
                  `small party whip and small party spokesperson`=0,
                  `speaker`=0,
                  whip=0,
                  `whip and minister`=0))

posdays <- posdays %>%
  mutate(
    across(minister:`whip and minister`,
           .fns = ~./918))

write_csv(posdays, 'data/output/MP_posdays.csv')
